# pip3 install transformers
# python3 deepseek_tokenizer.py
import transformers

chat_tokenizer_dir = "./"

tokenizer = transformers.AutoTokenizer.from_pretrained(
    chat_tokenizer_dir, trust_remote_code=True
)
# token id
# 苏江通南  四个token
# 江苏南通  两个token
result = tokenizer.encode("江苏南通")
print(result)
